In [1]:
    
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import holoviews as hl
%load_ext holoviews.ipython
import sklearn.metrics
    
    
    
    
    
    
This is the model without any constraints on kernel or col norms. We want to see what value the col_norms_mean settles to to set these constraints at 80% of the value.
In [135]:
    
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/quicker_learning_1_fc_layer_experiment_no_norms_repeat_recent.pkl")
    
In [67]:
    
def make_curves(model, *args):
    curves = None
    for c in args:
        channel = m.monitor.channels[c]
        c = c[0].upper() + c[1:]
        if not curves:
            curves = hl.Curve(zip(channel.example_record,channel.val_record),group=c)
        else:
            curves += hl.Curve(zip(channel.example_record,channel.val_record),group=c)
    return curves
    
Plot all col_norms in the fully connected and softmax composite layers.
In [68]:
    
means = [c for c in sorted(m.monitor.channels.keys()) if "mean" in c and "norm" in c and "col" in c]
make_curves(m,*means)
    
    Out[68]:
Plot all kernel_norms_mean in convolutional layers.
In [69]:
    
means = [c for c in sorted(m.monitor.channels.keys()) if "mean" in c and "norm" in c and "kernel" in c]
make_curves(m,*means)
    
    Out[69]:
We think that the first (and in this model - the only) fully connected layer is indicative of the value we're after.
In [136]:
    
channel = m.monitor.channels["train_h4_col_norms_max"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_mean"]
plt.plot(channel.epoch_record, channel.val_record)
    
    Out[136]:
    
An attempt to find the saturating value of the mean by fitting a parabola to it and taking the min.
In [137]:
    
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = min(p(range(200)))
print(sat)
    
    
In [138]:
    
sat * 0.8
    
    Out[138]:
That's the 80%.
In [139]:
    
channel = m.monitor.channels["train_h1_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h1_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h1_kernel_norms_mean"]
plt.plot(channel.val_record)
    
    Out[139]:
    
In [140]:
    
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat
    
    Out[140]:
In [141]:
    
sat * 0.8
    
    Out[141]:
80% of layer 1 kernel_norm_mean
In [142]:
    
channel = m.monitor.channels["train_h2_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_mean"]
plt.plot(channel.val_record)
    
    Out[142]:
    
In [143]:
    
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat
    
    Out[143]:
In [144]:
    
sat * 0.8
    
    Out[144]:
80% of layer 2 kernel_norm_mean
In [145]:
    
channel = m.monitor.channels["train_h3_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_mean"]
plt.plot(channel.val_record)
    
    Out[145]:
    
In [146]:
    
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat
    
    Out[146]:
In [147]:
    
sat * 0.8
    
    Out[147]:
80% of layer 2 kernel_norm_mean
Attempt to set the weights with Gavin. Went wrong.
In [186]:
    
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_recent.pkl")
    
In [117]:
    
import neukrill_net.plotting as pl
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")
    
    Out[117]:
In [113]:
    
%%opts HeatMap style(cmap='gray')
pl.model_weights(m)
    
    Out[113]:
In [120]:
    
pl.monitor_channels(m, [c for c in m.monitor.channels if "norm" in c and "max" in c], x_axis = "epoch")
    
    Out[120]:
In [119]:
    
reload(pl)
    
    Out[119]:
Attempt to set weights with Matt. Looks like it didn't break yet!
In [184]:
    
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_recent.pkl")
    
In [151]:
    
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")
    
    Out[151]:
In [152]:
    
pl.monitor_channels(m, [c for c in m.monitor.channels if "norm" in c and "max" in c], x_axis = "epoch")
    
    Out[152]:
In [134]:
    
channel = m.monitor.channels["train_h4_col_norms_max"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_mean"]
plt.plot(channel.epoch_record, channel.val_record)
    
    Out[134]:
    
The same model with dropout set to 0.9.
In [183]:
    
m_drop = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_dropout_recent.pkl")
    
In [163]:
    
pl.monitor_channels(m_drop, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m_drop, ["train_y_y_1_nll"], x_axis = "epoch")
    
    Out[163]:
Compare how fast the orginial and the dropout models are going down.
In [185]:
    
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m_drop, ["valid_y_y_1_nll"], x_axis = "epoch")
    
    Out[185]:
The model with more augmentations and no dropout (set to 1).
In [175]:
    
m_aug = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_aug_recent.pkl")
    
In [179]:
    
pl.monitor_channels(m_aug, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")
    
    Out[179]:
In [182]:
    
pl.monitor_channels(m_aug, [c for c in m.monitor.channels if "norms_mean" in c], x_axis = "epoch")
    
    Out[182]: